library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
September 22,2020
##kBTU Bar Graph depecting electricy and gas use in the bay for the last 3 yars
setwd("~/Desktop/Working_Directory/BayArea/HW1")
year <- 2017
quarters <- 1:14
type <- "Gas"
pge_gas <- NULL
quarter = 1
for(run in quarters) {
if ((run == 5) | (run == 9) | (run == 13)){
year = year + 1
quarter = quarter - 4
}
if ((quarter == 3) & (year == 2020)){
break
}
filename <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp <- read_csv(filename)
pge_gas <- rbind(pge_gas,temp)
quarter = quarter + 1
}
year <- 2017
quarters <- 1:14
type <- "Electric"
pge_electric <- NULL
quarter = 1
for(run in quarters) {
if ((run == 5) | (run == 9) | (run == 13)){
year = year + 1
quarter = quarter - 4
}
if ((quarter == 3) & (year == 2020)){
break
}
filename <-
paste0(
"PGE_",
year,
"_Q",
quarter,
"_",
type,
"UsageByZip.csv"
)
temp <- read_csv(filename)
pge_electric <- rbind(pge_electric,temp)
quarter = quarter + 1
}
pge_filter_elec <- filter(pge_electric, CUSTOMERCLASS %in% c("Elec- Residential","Elec- Commercial"))
pge_filter_gas <- filter(pge_gas, CUSTOMERCLASS %in% c("Gas- Residential","Gas- Commercial"))
pge_select_elec <- mutate(pge_filter_elec, MONTH = MONTH + (12*(YEAR - 2017)) )
pge_select_gas <- mutate(pge_filter_gas, MONTH = MONTH + (12*(YEAR - 2017)) )
pge_select_elec <-select(pge_select_elec ,!c( COMBINED, AVERAGEKWH))
pge_select_gas <-select(pge_select_gas ,!c( COMBINED, AVERAGETHM))
rm(pge_filter_elec, pge_filter_gas)
pge_group_elec <- group_by( pge_select_elec, MONTH, CUSTOMERCLASS, YEAR)
pge_group_gas <- group_by( pge_select_gas, MONTH, CUSTOMERCLASS, YEAR)
rm(pge_select_elec,pge_select_gas)
pge_summarize_elec <- summarize( pge_group_elec, TOTALKWH = sum( TOTALKWH, na.rm = T),
TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))
pge_summarize_gas <- summarize( pge_group_gas, TOTALTHM = sum( TOTALTHM, na.rm = T),
TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))
rm(pge_group_elec,pge_group_gas)
pge_mutate_elec <- mutate(pge_summarize_elec, AVERAGEkBTU = (TOTALKWH * 3412.14 ) /TOTALCUSTOMERS)
pge_mutate_gas <- mutate(pge_summarize_gas , AVERAGEkBTU = (TOTALTHM * 99976.1 )/TOTALCUSTOMERS)
rm(pge_summarize_elec,pge_summarize_gas)
pgeELECT <-select(pge_mutate_elec ,-TOTALKWH)
pgeGAS <-select(pge_mutate_gas , -TOTALTHM )
pge_final<- rbind(pgeELECT,pgeGAS)
rm(pge_mutate_elec,pge_mutate_gas,pgeELECT,pgeGAS)
pge_chart <-
pge_final %>%
ggplot() +
geom_bar(
aes(
# x = MONTH %>% factor(),
x = factor(MONTH),
y = AVERAGEkBTU,
fill = CUSTOMERCLASS
),
stat = "identity",
position = "stack"
) +
labs(
x = "Months Starting Jan 2017",
y = "kBTU",
title = "PG&E Territory Monthly Electricity and Gas Usage, 2017-Present",
fill = "Electricity and Gas"
)+
coord_flip()+
theme(
legend.position = "bottom",
legend.direction = "vertical"
)
pge_chart %>% ggplotly() %>% config(displayModeBar = F)
#plot(pge_chart)
pge_filter_elecNEI <- filter(pge_electric, CUSTOMERCLASS %in% c("Elec- Residential"))
pge_filter_elecNEI <- filter(pge_filter_elecNEI, YEAR %in% c("2019", "2020"))
ElectMonth6 <- filter(pge_filter_elecNEI , MONTH %in% c("6"))
ElectMonth6 <-select(ElectMonth6 ,!c( COMBINED, AVERAGEKWH,CUSTOMERCLASS, MONTH))
ElectMonth6Group <- group_by( ElectMonth6, YEAR, ZIPCODE)
ElectMonth6Sum <- summarize( ElectMonth6Group, TOTALKWH = sum( TOTALKWH, na.rm = T),TOTALCUSTOMERS = sum(TOTALCUSTOMERS, na.rm = T))
Customers <- ElectMonth6Sum$TOTALCUSTOMERS
zeroCustomers<- which(Customers %in% 0)
ElectMonth6Sum <-ElectMonth6Sum[-c(zeroCustomers),]
ElecMutate<- mutate(ElectMonth6Sum, AVERAGEKWH = TOTALKWH /TOTALCUSTOMERS)
Observable changes in energy consumption that may be attributable to the COVID-19 pandemic: There doesnt seem to be any distinct changes in the expected electricity and gas usage during the covid pandemic if we use the prior year as a reference
Create at least one map that highlights which neighborhoods experienced the greatest change in electricity consumption before and after the pandemic began, and comment on your results.
Below is the energy consumption density within the bay area for June 2019 and June 2020. Both density graphs look pretty much identical, with the pandemic graph only showing slighlty more energy consumption
ca_counties <- readRDS("ca_counties.rds")
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
ca_counties %>%
filter(NAME %in% bay_county_names)
bay_cbgs <- block_groups("CA", bay_county_names[1:9], cb = T, progress_bar = F)
usa_zips <- zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
Elec2019 <- filter(ElecMutate, YEAR %in% c("2019"))
Elec2020 <- filter(ElecMutate, YEAR %in% c("2020"))
Elec2019 <- #2020
Elec2019 %>% #2020
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
res_pal <- colorNumeric(
palette = "Blues",
domain =
Elec2019$TOTALKWH #2020
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = Elec2019, #2020
fillColor = ~res_pal(TOTALKWH),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(TOTALKWH),
" kWh total in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = Elec2019, #2020
pal = res_pal,
values = ~TOTALKWH,
title = "Energy usage <br>kWh, June 2019"
)
The next plot shows energy usage in the bay area during the pandemic June 2020
Elec2020<-
Elec2020 %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
mutate(
ZIPCODE = ZIPCODE %>% as.character()
) %>%
group_by(ZIPCODE) %>%
summarize(
TOTALKWH = sum(TOTALKWH, na.rm = T)
) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326)
res_pal <- colorNumeric(
palette = "Blues",
domain =
Elec2020$TOTALKWH #2020
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = Elec2020, #2020
fillColor = ~res_pal(TOTALKWH),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(TOTALKWH),
" kWh total in ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = Elec2020, #2020
pal = res_pal,
values = ~TOTALKWH,
title = "Energy usage <br>kWh, June 2020"
)
Explain any key assumptions you made in the analysis, or caveats about the data sources that you think the reader should be aware of. Publish all of this work in a web report.
Key assumptions/caveats. We are only able to compare and contrast the energy usage on a census block scale because so we are not able to see variations from one neighborhood to the next.